# -*- encoding: utf-8 -*-
'''
Created on Apr 1, 2012

@author: LONG HOANG GIANG
'''
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
from CrawlerLib.http import Http
import commonlib
from lxml import etree
from readability.readability import Document
import re

template = '''<html>
<html>
<head>
<meta content="text/html;charset=utf-8" http-equiv="content-type" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="format-detection" content="telephone=no" />
<title>Lỡ tay chạm ngực con gái</title>
</head>
<body>
{0}
</body>
</html>
</html>
'''


def process(start, end):
    
    if not os.path.exists('/lotaychamnguccongai/'): os.makedirs('/lotaychamnguccongai', 0777)
    for page in range(start, end+1):
        url = 'http://www.wattpad.com/76518?p={0}'.format(page)
        html = Http.getHtml(url)
        articleNode = commonlib.getArticleNode(html)
        html = template.format(etree.tostring(articleNode, pretty_print=True, encoding='utf-8'))
        html = re.sub("<p>Lỡ tay chạm ngực con gái  </p>", "<h1>Lỡ tay chạm ngực con gáis </h1>", html)
        f = open('/lotaychamnguccongai/{0}.html'.format(page), 'w')
        f.write(html)
        f.close()

if __name__ == '__main__':
    
    process(1, 32)
    print '>> Finished'
    os._exit(1)